clear
set more off

global lecind    "\\intra\partages\au_amic2\SRCV\TABLES_INITIALES\INPUT\INDIVIDUS"
global lecindsas "\\intra\partages\au_amic2\SRCV_AVRIL2021\TABLES_INITIALES_AVRIL2021\INDIVIDUS"
global lecindcsv "\\intra\partages\au_amic2\SRCV_AVRIL2021\TABLES_INITIALES_AVRIL2021\INDIVIDUS"
global lectabpas "\\intra\partages\au_amic2\SRCV_AVRIL2021\TABLES_INITIALES_AVRIL2021\SRCV_2020"
global ecrind    "\\intra\partages\au_amic2\SRCV_AVRIL2021\BASES_FEV2022_V5\2020\INDIVIDUS"


/* 1 - EXTRACTION DES VARIABLES SELECTIONNéES */
/* a - fichier STATA 1ère livraison 2008-2017 */
foreach an of numlist 8  9 {
cd "$lecind"
use individus0`an'_diff.dta, clear
keep 	ind men age adultvrairep idmenc_* idindc_* pond_0`an' pb040 ///
		py010n py050n py100n py090n py110n py130n py120n py140n ///
		rb030 
gen annee_SRCV = 200`an'
order men ind annee_SRCV adultvrairep 
cd "$ecrind"
save indiv_`an'.dta, replace
display `an'
count 
}

foreach an of numlist 10 / 11 {
cd "$lecind"
use individus`an'_diff.dta, clear
keep 	ind men age adultvrairep idmenc_* idindc_* pond_`an' pb040 ///
		py010n py050n py100n py090n py110n py130n py120n py140n ///
		rb030 
gen annee_SRCV = 20`an'
order men ind annee_SRCV adultvrairep 
cd "$ecrind"
save indiv_`an'.dta, replace
display `an'
count 
}

foreach an of numlist 12 / 13 {
cd "$lecind"
use individus`an'_diffv2.dta, clear
keep 	ind men age adultvrairep idmenc_* idindc_* pond_`an' pb040 ///
		py010n py050n py100n py090n py110n py130n py120n py140n ///
		rb030 
gen annee_SRCV = 20`an'
order men ind annee_SRCV adultvrairep 
cd "$ecrind"
save indiv_`an'.dta, replace
display `an'
count 
}

foreach an of numlist 14/16  {
cd "$lecind"
use individus`an'_diff.dta, clear
keep 	ind men age adultvrairep idmenc_* idindc_* pond_`an' pb040 ///
		py010n py050n py100n py090n py110n py130n py120n py140n ///
		rb030 
gen annee_SRCV = 20`an'
order men ind annee_SRCV adultvrairep 
cd "$ecrind"
save indiv_`an'.dta, replace
display `an'
count 
}

foreach an of numlist 17  {
cd "$lecind"
use individus`an'_diff.dta, clear
keep 	ind men age adultvrairep idmenc_* idindc_* pond_`an' pb040 ///
		py010n py050n py100n py090n py110n py130n py120n py140n ///
		rb030 rb040 
gen annee_SRCV = 20`an'
order men ind annee_SRCV adultvrairep 
cd "$ecrind"
save indiv_`an'.dta, replace
display `an'
count 
}

/* b - fichier SAS 2nde livraison 2018-2019 */
foreach an of numlist 18 / 19 {
cd "$lecindsas"
use individus_20`an'.dta, clear
keep 	ind men age adultvrairep IDMENC_* IDINDC_* POND_`an' PB040 ///
		PY010N PY050N PY100N PY090N PY110N PY130N PY120N PY140N ///
		RB030 RB040
gen annee_SRCV = 20`an'
order men ind annee_SRCV adultvrairep 
cd "$ecrind"
save indiv_`an'.dta, replace
display `an'
count 
}

/* c - fichier CSV 3ème livraison 2020 */
foreach an of numlist 20 {
cd "$lecindcsv"
use individus_20`an'.dta, clear
keep 	/*ind men*/ age /*adultvrairep*/ /*IDMENC_* IDINDC_* */ /*POND_`an' */ PB040 ///
		PY010N PY050N PY100N PY090N PY110N PY130N PY120N PY140N PY021G PY080N PY080G ///
		RB030 RB040
gen annee_SRCV = 20`an'
order /* men ind */ RB030 RB040 annee_SRCV /* adultvrairep  */
cd "$ecrind"
save indiv_`an'.dta, replace
display `an'
}


/* 2 - CHANGEMENT DE NOM DES VARIABLES + SUPPRESSION DES NA + TABLE PASSAGE */
/* a - chgt nom variables */
foreach an of numlist 8 / 17 {
cd "$ecrind"
use indiv_`an'.dta, clear
rename py100n PY100N
rename py110n PY110N
rename py010n PY010N
rename py050n PY050N
rename py090n PY090N
rename py120n PY120N
rename py130n PY130N
rename py140n PY140N
rename idmenc_* idmenc
rename idindc_* idindc
rename pond_* pond_ind
save indiv_`an'.dta, replace
}

foreach an of numlist 18 / 19 {
cd "$ecrind"
use indiv_`an'.dta, clear
rename IDMENC_* idmenc
rename IDINDC_* idindc
rename POND_* pond_ind
rename PB040 pb040
rename RB030 rb030
rename RB040 rb040
save indiv_`an'.dta, replace
}

foreach an of numlist 20 {
cd "$ecrind"
use indiv_`an'.dta, clear
rename PB040 pb040
rename RB030 rb030
rename RB040 rb040
save indiv_`an'.dta, replace
}

/* b - suppression des NA */
clear
use "$ecrind\indiv_20.dta"
replace pb040 = "" if pb040 == "NA"
replace PY010N = "" if PY010N == "NA"
replace PY050N = "" if PY050N == "NA"
replace PY100N = "" if PY100N == "NA"
replace PY090N = "" if PY090N == "NA"
replace PY110N = "" if PY110N == "NA"
replace PY130N = "" if PY130N == "NA"
replace PY120N = "" if PY120N == "NA"
replace PY120N = "" if PY120N == "NA"
replace PY140N = "" if PY140N == "NA"
replace PY021G = "" if PY021G == "NA"
replace PY080N = "" if PY080N == "NA"
replace PY080G = "" if PY080G == "NA"

destring pb040, replace
destring PY010N, replace
destring PY050N, replace
destring PY100N, replace
destring PY090N, replace
destring PY110N, replace
destring PY130N, replace
destring PY120N, replace
destring PY140N, replace
destring PY021G, replace
destring PY080N, replace
destring PY080G, replace

save "$ecrind\indiv_20.dta", replace
*keep if rb030 == "0438440002"

/* c - table passage */
clear
use "$lectabpas\idind.dta"
rename RB030 rb030
save "$ecrind\idind.dta", replace

clear
use "$ecrind\indiv_20.dta"
count /* 24758*/
merge m:m rb030 using "$ecrind\idind.dta"
save "$ecrind\merge_individus", replace


* on supprime les individus de tabpass n'existant pas dans SRCV 2020
clear
use "$ecrind\merge_individus"
drop if _merge == 2
drop _merge
order rb030 ssech IDINDC_17 IDINDC_18 ind IDINDC_19
save "$ecrind\indiv_20.dta", replace
sort ind
tab ssech


/* 3 - EMPILEMENT DES TABLES */
clear 
use "$ecrind\indiv_8.dta", clear
append using 	"$ecrind\indiv_9.dta" "$ecrind\indiv_10.dta" "$ecrind\indiv_11.dta" ///
				"$ecrind\indiv_12.dta" "$ecrind\indiv_13.dta" "$ecrind\indiv_14.dta" ///
				"$ecrind\indiv_15.dta" "$ecrind\indiv_16.dta" "$ecrind\indiv_17.dta" ///
				"$ecrind\indiv_18.dta" "$ecrind\indiv_19.dta" "$ecrind\indiv_20.dta" 
order rb030 rb040
count /* 341 030 + 23 var. */
order idmenc annee_SRCV ind men
sort idmenc annee_SRCV ind men				
save "$ecrind\individus_2008_2020.dta", replace


/* 4 - LABELS */
clear 
use "$ecrind\individus_2008_2020.dta", clear
label variable ind				"Identifiant longitudinal de l individu"
label variable men 				"Identifiant longitudinal du menage" 
label variable annee_SRCV		"annee de l'enquete/du fichier"
label variable age				"Age de l individu a la date de l enquete"
label variable adultvrairep		"L individu a rempli un questionnaire individuel"
label variable idmenc			"Identifiant transversal du menage"
label variable idindc			"Identifiant transversal de l individu"
label variable pond_ind			"Ponderation transversale individus"
label variable pb040			"Ponderation personnelle transversale européenne"
label variable PY010N			"Salaire net percu"
label variable PY050N			"Benef/pertes nets especes activ indep"
label variable PY100N			"Montant allocations vieillesse nettes"
label variable PY090N			"Montant net allocations chomage"
label variable PY110N			"Allocations de reversion nettes"
label variable PY130N			"Pensions d'invalidite nettes"
label variable PY120N			"Indemnites de maladie nettes"
label variable PY140N			"Bourses d etudes nettes"
label variable PY021G			"Avantages en nature sous forme de voiture de fonction"
label variable PY080G			"Pensions versees par regimes prives de retraite individuelle - brutes"
label variable PY080N			"Pensions versees par regimes prives de retraite individuelle - nettes"
label variable PY140N			"Bourses d etudes nettes"
label variable rb030			"Identifiant individuel europeen"
label variable rb040			"Identifiant menage européen longitudinal - table individu"
sort ind rb030 annee_SRCV 
save "$ecrind\individus_2008_2020.dta", replace


/* 5 - REMPLACEMENT DES idindc DE 2020 ABSENTS PAR idindc_19 ou idindc_18 ou idindc_17 */
clear 
use "$ecrind\individus_2008_2020.dta", clear
order ind annee_SRCV idindc IDINDC_19 IDINDC_18 IDINDC_17 rb030 pond_ind men idindc rb040

gen indic_replace = .
replace indic_replace = 19 if (annee_SRCV == 2020 & idindc == "" & IDINDC_19 != "")
replace idindc = IDINDC_19 if (annee_SRCV == 2020 & idindc == "" & IDINDC_19 != "")

replace indic_replace = 18 if (annee_SRCV == 2020 & idindc == "" & IDINDC_19 == "" & IDINDC_18 != "")
replace idindc = IDINDC_18 if (annee_SRCV == 2020 & idindc == "" & IDINDC_19 == "" & IDINDC_18 != "")

replace indic_replace = 17 if (annee_SRCV == 2020 & idindc == "" & IDINDC_19 == "" & IDINDC_18 == "" & IDINDC_17 != "")
replace idindc = IDINDC_17 if (annee_SRCV == 2020 & idindc == "" & IDINDC_19 == "" & IDINDC_18 == "" & IDINDC_17 != "")

sort ind annee_SRCV idindc IDINDC_19 IDINDC_18 IDINDC_17 indic_replace rb030 pond_ind 
order ind annee_SRCV idindc IDINDC_19 IDINDC_18 IDINDC_17 indic_replace rb030 pond_ind 

tab indic_replace

/* nettoyage */
drop IDINDC_17 IDINDC_18 IDINDC_19
save "$ecrind\individus_2008_2020.dta", replace
sort ind annee_SRCV
order ind annee_SRCV 

clear 
use "$ecrind\individus_2008_2020.dta", clear
keep if annee_SRCV ==2020
save "$ecrind\fin_ind_2020.dta", replace

